{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### from : https://martin-thoma.com/word-error-rate-calculation/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def wer(r, h):\n",
    "    \"\"\"\n",
    "    Calculation of WER with Levenshtein distance.\n",
    "\n",
    "    Works only for iterables up to 254 elements (uint8).\n",
    "    O(nm) time ans space complexity.\n",
    "\n",
    "    Parameters\n",
    "    ----------\n",
    "    r : list\n",
    "    h : list\n",
    "\n",
    "    Returns\n",
    "    -------\n",
    "    int\n",
    "\n",
    "    Examples\n",
    "    --------\n",
    "    >>> wer(\"who is there\".split(), \"is there\".split())\n",
    "    1\n",
    "    >>> wer(\"who is there\".split(), \"\".split())\n",
    "    3\n",
    "    >>> wer(\"\".split(), \"who is there\".split())\n",
    "    3\n",
    "    \"\"\"\n",
    "    # initialisation\n",
    "    import numpy\n",
    "    d = numpy.zeros((len(r)+1)*(len(h)+1), dtype=numpy.uint8)\n",
    "    d = d.reshape((len(r)+1, len(h)+1))\n",
    "    for i in range(len(r)+1):\n",
    "        for j in range(len(h)+1):\n",
    "            if i == 0:\n",
    "                d[0][j] = j\n",
    "            elif j == 0:\n",
    "                d[i][0] = i\n",
    "\n",
    "    # computation\n",
    "    for i in range(1, len(r)+1):\n",
    "        for j in range(1, len(h)+1):\n",
    "            if r[i-1] == h[j-1]:\n",
    "                d[i][j] = d[i-1][j-1]\n",
    "            else:\n",
    "                substitution = d[i-1][j-1] + 1\n",
    "                insertion    = d[i][j-1] + 1\n",
    "                deletion     = d[i-1][j] + 1\n",
    "                d[i][j] = min(substitution, insertion, deletion)\n",
    "\n",
    "    return d[len(r)][len(h)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1\n",
      "3\n",
      "3\n"
     ]
    }
   ],
   "source": [
    "print wer(\"who is there\".split(), \"is there\".split())\n",
    "print wer(\"who is there\".split(), \"\".split())\n",
    "print wer(\"\".split(), \"who is there\".split())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "5531\n",
      "['Excuse', 'me.']\n"
     ]
    }
   ],
   "source": [
    "list_ref = []\n",
    "\n",
    "with open('../data/processed/IEMOCAP/four_category/FC_trans.txt') as f:\n",
    "    list_ref = f.readlines()\n",
    "list_ref = [ x.strip() for x in list_ref ]\n",
    "list_ref = [ [ t for t in x.split(' ')] for x in list_ref ]\n",
    "print len(list_ref)\n",
    "print list_ref[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "5531\n",
      "['give', 'me']\n"
     ]
    }
   ],
   "source": [
    "list_target = []\n",
    "\n",
    "\n",
    "with open('../data/processed/IEMOCAP/four_category_G/FC_trans.txt') as f:\n",
    "    list_target = f.readlines()\n",
    "list_target = [ x.strip() for x in list_target ]\n",
    "list_target = [ [ t for t in x.split(' ')] for x in list_target ]\n",
    "print len(list_target)\n",
    "print list_target[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "5.5367926234\n"
     ]
    }
   ],
   "source": [
    "list_wer = []\n",
    "\n",
    "for i in xrange( len(list_ref )) :\n",
    "    list_wer.append( wer(list_ref[i], list_target[i])  )\n",
    "print np.mean(list_wer)    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "4.92442596276\n"
     ]
    }
   ],
   "source": [
    "list_wer_lower = []\n",
    "list_target_lower = []\n",
    "\n",
    "list_ref_lower = [ [t.lower() for t in x] for x in list_ref ]\n",
    "list_target_lower = [ [t.lower() for t in x] for x in list_target ]\n",
    "\n",
    "list_wer_lower = []\n",
    "\n",
    "for i in xrange( len(list_ref_lower )) :\n",
    "    list_wer_lower.append( wer(list_ref_lower[i], list_target_lower[i])  )\n",
    "print np.mean(list_wer_lower)    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.0\n"
     ]
    }
   ],
   "source": [
    "# just check\n",
    "list_wer_check = []\n",
    "\n",
    "for i in xrange( len(list_ref )) :\n",
    "    list_wer_check.append( wer(list_ref[i], list_ref[i])  )\n",
    "print np.mean(list_wer_check)    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
